////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///  Table 2: OLS estimation (Dependent Variable: Squared Error in Assessments), Rounds 38-75

use "$data/eoydata_late.dta", clear
tab risk if round ==1
tab round if round ==1

gen syerror = (value - guess)^2
reg syerror d_ng d_og d_sf if filteredsample ==1, cluster(id)
est store A
//this is to compare the Baseline to the Bayesian benchmark
test _cons ==46

reg syerror d_ng d_og d_sf, cluster(id)
est store B

reg syerror d_ng d_og d_sf risk  if filteredsample ==1, cluster(id)
est store C

reg syerror d_ng d_og d_sf risk, cluster(id)
est store D


label variable d_ng "NoGroup"
label variable d_og "OneGroup"
label variable d_sf "SignalFirst"
label variable risk "Risk measure"


esttab A C B D using "$text/files/reg/mse_reg1_wrisk_late.tex", se ///
mtitles("(1)" "(2)" "(3)" "(4)") ///
eqlabels(none) ///
order(d_ng d_og d_sf) ///
booktabs  star(* 0.10 ** 0.05 *** 0.01) ///
nonumbers nonotes ///
substitute(\_ _) ///
addnotes("Standard errors (clustered at the subject level) in parentheses." ///
 "$^{***}$1\%, $^{**}$5\%, $^{*}$10\% significance." ///
 "Constant shows MSE at Baseline at risk measure of zero." ///
 "Dummies shows difference relative to Baseline." ///
 "Lower values for risk measure correspond to higher risk aversion." ///
  "Risk measure missing for 5 subjects." ///
 "(1) and (2): Subjects with MSE less than or equal to 200." ///
 "(3) and (4): All subjects." ///
)  label replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	Table 3: Likelihood of Higher Assessment when ti − tj = 0.
// Table 4: Likelihood of Higher Assessment when ti − tj = 5.
// Table 5: Likelihood of Higher Assessment when tj − tj = 10.


// alternative meaures of discrimination
use "$data/eoydata_late.dta", clear
gen deltavalue = round-38
gen foo = var_signal*highgroup
egen vxh = max(foo), by(treatment)
drop foo
gen foo = var_signal*(1-highgroup)
egen vxl = max(foo), by(treatment)
replace var_signal = (vxh+ vxl)/2

keep if deltavalue == 0 | deltavalue == 5 | deltavalue == 10


gen winsamegroup = normal((bdelta*0 + (alpha/(1-alpha))*0 + deltavalue)/ sqrt(2*var_signal))
gen winhighgroup = normal((bdelta + (alpha/(1-alpha))*20 + deltavalue)/ sqrt(2*var_signal))
gen winlowgroup = normal((-bdelta - (alpha/(1-alpha))*20 + deltavalue)/ sqrt(2*var_signal))
replace winhighgroup = normal((bdelta + deltavalue)/ sqrt(2*var_signal))  if treatmentname =="NoGroup"
replace winlowgroup = normal((-bdelta + deltavalue)/ sqrt(2*var_signal))  if treatmentname =="NoGroup"

collapse winsamegroup winhighgroup winlowgroup, by(treatmentname deltavalue)
sort deltavalue treatmentname
	
	
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////		
/// Testing Linearity

//net install binsreg, from("https://raw.githubusercontent.com/nppackages/binsreg/master/stata") replace
use "$data/eoydata_late.dta", clear
keep if filteredsample ==1
binstest guess value if treatmentname =="Baseline" & highgroup ==1, binsmethod(rot) testmodelpoly(1)
binstest guess value if treatmentname =="Baseline" & highgroup ==0, binsmethod(rot) testmodelpoly(1)


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Table 6: Model Estimates (Median Values)
// this is for reporting median values on the individual-level and testing distributions
//// This replicates the main table using median values
use "$data/eoydata_late.dta", clear
bysort id highgroup: gen round_bygroup = _n
keep if round_bygroup == 1
egen malpha = median(alpha_i), by(treatment highgroup)
egen mbias = median(bias_i), by(treatment highgroup)
egen mvs = median(var_signal_i), by(treatment highgroup)
egen malphaopt = median(alpha_opt_i), by(treatment highgroup)

bysort treatmentname: tab malpha highgroup
bysort treatmentname: tab mbias highgroup
bysort treatmentname: tab mvs highgroup
bysort treatmentname: tab malphaopt highgroup

bysort treatmentname: ranksum alpha_i, by(highgroup)
bysort treatmentname: ranksum bias_i, by(highgroup)

expand 2, gen(dupindicator)
replace alpha_i = alpha_opt_i if dupindicator==1

bysort treatmentname highgroup: ranksum alpha_i, by(dupindicator)




/// for statistical statements on the median value
use "$data/eoydata_late.dta", clear
*use "$data/dat_sy1_west.dta", clear
tab treatment treatmentname 
//1. bl, 2. sf, 3.ng, 4. og
drop if filteredsample ==0
gen foo = 1 
egen tfoo = sum(foo), by(treatment)
drop foo
gen tnobs = tfoo/38
*gen tnobs = tfoo/75

gen diffv_h_bl = diff_v*highgroup
gen diffv_l_bl = diff_v*(1-highgroup)
replace d_bl = 1


set seed 666
forvalues i=1/500{
preserve
bsample tnobs, strata(treatment) cluster(id)
forvalues j = 1/4{
reg diff diffv_h_bl diffv_l_bl d_bl highgroup if treatment == `j', nocons cluster(id)
matrix b = e(b)
replace alpha = b[1,1] if highgroup ==1 & treatment == `j'
replace alpha = b[1,2] if highgroup ==0 & treatment == `j'
replace bias = b[1,3] + b[1,4] if highgroup ==1 & treatment == `j'
replace bias = b[1,3] if highgroup ==0 & treatment == `j'
}
replace signal = (guess - bias - alpha*mean_theory) / (1- alpha)
// This is for variance 
gen sigerror = signal - value
gen foo = (sigerror)^2
egen vs_x = mean(foo), by(highgroup treatment id)
replace  vs_x = median(vs_x), by(highgroup treatment)


// This is for the Delta_g graph
*gen foo = bias/(1-alpha)
*egen vs_x = mean(foo), by(highgroup treatment)

// This is for the alpha opt - alpha graph
*gen foo = (sigerror)^2
*egen vs_x = mean(foo), by(highgroup treatment)
*replace vs_x = (1/var_theory)/ ((1/vs_x) + (1/var_theory)) - alpha

gen foo1_h = vs_x if treatment ==1 & highgroup ==1
gen foo2_h = vs_x if treatment ==2 & highgroup ==1
gen foo3_h = vs_x if treatment ==3 & highgroup ==1
gen foo4_h = vs_x if treatment ==4 & highgroup ==1
gen foo1_l = vs_x if treatment ==1 & highgroup ==0
gen foo2_l = vs_x if treatment ==2 & highgroup ==0
gen foo3_l = vs_x if treatment ==3 & highgroup ==0
gen foo4_l = vs_x if treatment ==4 & highgroup ==0
egen gdh1_x = max(foo1_h)
egen gdh2_x = max(foo2_h)
egen gdh3_x = max(foo3_h)
egen gdh4_x = max(foo4_h)
egen gdl1_x = max(foo1_l)
egen gdl2_x = max(foo2_l)
egen gdl3_x = max(foo3_l)
egen gdl4_x = max(foo4_l)
local xxxh1 = gdh1_x
local xxxh2 = gdh2_x
local xxxh3 = gdh3_x
local xxxh4 = gdh4_x
local xxxl1 = gdl1_x
local xxxl2 = gdl2_x
local xxxl3 = gdl3_x
local xxxl4 = gdl4_x
restore
gen gd1h_`i' = `xxxh1' 
gen gd2h_`i' = `xxxh2' 
gen gd3h_`i' = `xxxh3' 
gen gd4h_`i' = `xxxh4' 
gen gd1l_`i' = `xxxl1' 
gen gd2l_`i' = `xxxl2' 
gen gd3l_`i' = `xxxl3' 
gen gd4l_`i' = `xxxl4' 
}

// alpha_opt vs alpha
*gen diff_orig = alpha_opt - alpha
// for signal variance
gen diff_orig = var_signal
// for delta_g
*gen diff_orig =  bias/(1- alpha)

collapse gd*  alpha alpha_opt bias var_signal diff_orig, by(treatment treatmentname highgroup)

reshape long gd1h_ gd2h_ gd3h_ gd4h_ gd1l_ gd2l_ gd3l_ gd4l_ , i(treatment treatmentname highgroup) j(index)  


rename gd1h_ gd1h
rename gd2h_ gd2h
rename gd3h_ gd3h
rename gd4h_ gd4h
rename gd1l_ gd1l
rename gd2l_ gd2l
rename gd3l_ gd3l
rename gd4l_ gd4l

gen var_signal_boot = 999
replace var_signal_boot = gd1h if treatment ==1 & highgroup ==1
replace var_signal_boot = gd2h if treatment ==2 & highgroup ==1
replace var_signal_boot = gd3h if treatment ==3 & highgroup ==1
replace var_signal_boot = gd4h if treatment ==4 & highgroup ==1
replace var_signal_boot = gd1l if treatment ==1 & highgroup ==0
replace var_signal_boot = gd2l if treatment ==2 & highgroup ==0
replace var_signal_boot = gd3l if treatment ==3 & highgroup ==0
replace var_signal_boot = gd4l if treatment ==4 & highgroup ==0


tab var_signal treatmentname if highgroup ==1
tab var_signal treatmentname if highgroup ==0

gen diff = var_signal_boot
gen lowgroup = 1-highgroup


bysort treatmentname highgroup: summarize diff, detail

sort treatmentname highgroup
by treatmentname highgroup: egen ub = pctile(diff), p(95)
by treatmentname highgroup: egen lb = pctile(diff), p(5)


// p-values for whether values are different from zero
preserve
replace diff = diff - diff_orig
gen dummy = 0
replace dummy = 1 if diff< -abs(diff_orig) |  diff> abs(diff_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore

// p-values for whether values are different between high vand low groups
preserve
gen foo = diff
replace foo = . if lowgroup ==1
egen diffhigh = max(diff), by(treatmentname index)
drop foo
gen foo = diff_orig
replace foo = . if lowgroup ==1
egen diffhigh_orig = max(diff_orig), by(treatmentname)
drop if highgroup ==1
gen diffnew = diffhigh - diff
gen diffnew_orig = diffhigh_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname)
by treatmentname: summarize pvalue
restore

// p-values for whether values are different between treatments
//1. bl, 2. sf, 3.ng, 4. og
preserve
gen foo = diff
replace foo = . if treatment >1
egen diffbl = max(diff), by(highgroup index)
drop foo
gen foo = diff_orig
replace foo = . if treatment >1
egen diffbl_orig = max(diff_orig), by(highgroup)
drop if treatmentname =="Baseline"
gen diffnew = diffbl - diff
gen diffnew_orig = diffbl_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Table 7: OLS estimation (Dependent Variable: Squared Error in Assessments), All Rounds

use "$data/eoydata_late.dta", clear
tab risk if round ==1
tab round if round ==1

gen syerror = (value - guess)^2
reg syerror d_ng d_og d_sf if filteredsample ==1, cluster(id)
est store A
//this is to compare the Baseline to the Bayesian benchmark
test _cons ==46

reg syerror d_ng d_og d_sf, cluster(id)
est store B

reg syerror d_ng d_og d_sf risk  if filteredsample ==1, cluster(id)
est store C

reg syerror d_ng d_og d_sf risk, cluster(id)
est store D


label variable d_ng "NoGroup"
label variable d_og "OneGroup"
label variable d_sf "SignalFirst"
label variable risk "Risk measure"

esttab A C B D using "$text/files/reg/mse_reg1_wrisk.tex", se ///
mtitles("(1)" "(2)" "(3)" "(4)") ///
eqlabels(none) ///
order(d_ng d_og d_sf) ///
booktabs  star(* 0.10 ** 0.05 *** 0.01) ///
nonumbers nonotes ///
substitute(\_ _) ///
addnotes("Standard errors (clustered at the subject level) in parentheses." ///
 "$^{***}$1\%, $^{**}$5\%, $^{*}$10\% significance." ///
 "Constant shows MSE at Baseline at risk measure of zero." ///
 "Dummies shows difference relative to Baseline." ///
 "Lower values for risk measure correspond to higher risk aversion." ///
  "Risk measure missing for 5 subjects." ///
 "(1) and (2): Subjects with MSE less than or equal to 200." ///
 "(3) and (4): All subjects." ///
)  label replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Table 8: Model Estimates (All Rounds)
See Tables_final.do and run code for main table replacing data file with the full sample.



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Table 9: Change in Model Estimates by Treatment
use "$data/eoydata.dta", clear
keep if filteredsample ==1
gen early = round < 38
gen late = round >37

gen lowgroup = 1- highgroup
gen xxdiff = guess - value
gen xxdiffv_l = (mean_theory - value)*lowgroup
gen xxdiffv_h = (mean_theory - value)*highgroup

gen ptype = 1


gen xdiff_vh_l = (mean_theory - value)*highgroup*late
gen xdiff_vl_l = (mean_theory - value)*lowgroup*late
gen highgroup_l = highgroup*late
gen lowgroup_l = lowgroup*late

gen xxl = xxdiffv_l
gen xxh = xxdiffv_h


reg diff xxh xxl highgroup lowgroup xdiff_vh_l xdiff_vl_l highgroup_l lowgroup_l if ptype ==1 &treatmentname =="Baseline", nocons cluster(id) vce(bootstrap)
est store BL_high
reg diff xxh xxl highgroup lowgroup xdiff_vh_l xdiff_vl_l highgroup_l lowgroup_l if ptype ==1 &treatmentname =="SignalFirst", nocons cluster(id) vce(bootstrap)
est store SF_high
reg diff xxh xxl highgroup lowgroup xdiff_vh_l xdiff_vl_l highgroup_l lowgroup_l if ptype ==1 &treatmentname =="NoGroup", nocons cluster(id) vce(bootstrap)
est store NG_high
reg diff xxh xxl highgroup lowgroup xdiff_vh_l xdiff_vl_l highgroup_l lowgroup_l if ptype ==1 &treatmentname =="OneGroup", nocons cluster(id) vce(bootstrap)
est store OG_high


label variable lowgroup "B_l"
label variable highgroup "B_h"
label variable xxl "$\omega_l$"
label variable xxh "$\omega_h$"
label variable lowgroup_l "Change in $B_l$ in Rounds $>$37"
label variable highgroup_l "Change in $B_h$ in Rounds $>$37"
label variable xdiff_vl_l "Change in $\omega_l$ in Rounds $>$37"
label variable xdiff_vh_l "Change in $\omega_h$ in Rounds $>$37"


esttab BL_high NG_high SF_high OG_high using "$text/files/reg/learning_modelest_all.tex", se ///
mtitles("Baseline" "NoGroup" "SignalFirst"  "OneGroup") ///
eqlabels(none) ///
order() ///
booktabs  star(* 0.10 ** 0.05 *** 0.01) ///
nonumbers nonotes ///
substitute(\_ _) ///
addnotes("Bootstrapped standard errors (clustered at the subject level) in parentheses." ///
 "$^{***}$1\%, $^{**}$5\%, $^{*}$10\% significance." ///
  "Subjects with MSE less than or equal to 200." ///
)  label replace


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//// Table 10: Actual vs. Adjusted Predictions
//// algorithmic approach to compute counterfactuals in the final section
clear
set obs 100
gen value = _n 
// compute p(v) 
gen prob_value = 1/2*(normal((value +0.5 - 40)/10) - normal((value -0.5 - 40)/10)) +1/2*(normal((value +0.5 - 60)/10) - normal((value -0.5 - 60)/10))
egen tt = sum(prob_value)
replace prob_value = prob_value/tt
drop tt
append using "$data/eoydata_late.dta"
append using "$data/measures_late_combined.dta"
gen foo = gdpara - gdopt
egen gddiff = max(foo)
drop foo
gen foo = msepara - mseopt
egen msediff = max(foo)
drop foo
gen foo = gddata
drop gddata
egen gddata = max(foo)
drop foo
gen foo = msedata
drop msedata
egen msedata = max(foo)
drop foo
keep if gdpara ==.
keep if treatmentname == "Baseline" & filteredsample ==1| highgroup == .
gen diffv_h_bl = diff_v*highgroup
gen diffv_l_bl = diff_v*(1-highgroup)
keep guess value highgroup prob_value diff diffv_h_bl diffv_l_bl d_bl mean_theory msediff gddiff msediff gddata msedata
rename prob_value foo
egen prob_value = max(foo), by(value)
keep if foo ==.
drop foo

gen lgroup = highgroup==0
gen alpha = -999
gen bias = -999




set seed 666
 forvalues i=1/500{
preserve
splitsample, generate(svar)
reg diff diffv_h_bl diffv_l_bl d_bl highgroup if svar ==1, nocons 
matrix b = e(b)
replace alpha = b[1,1] if highgroup ==1
replace alpha = b[1,2] if highgroup ==0
replace bias = b[1,3] + b[1,4] if highgroup ==1
replace bias = b[1,3] if highgroup ==0
gen signal = (guess - bias - alpha*(mean_theory))/(1- alpha)
gen serror = (signal - value)^2
replace serror = . if svar ==2
egen var_signal = mean(serror)
drop serror
gen optalpha = var_signal/(var_signal + 200)
gen bayalpha = var_signal/(var_signal + 100)
keep if svar ==2
gen altguess = optalpha*50 +(1-optalpha)*signal
gen altsqerror = (altguess - value)^2
gen bayguess = bayalpha*mean_theory +(1-bayalpha)*signal
gen baysqerror = (bayguess - value)^2
gen sqerror = (guess - value)^2
egen altmse = mean(altsqerror)
egen mse = mean(sqerror)
egen baymse = mean(baysqerror)
egen vlcount = sum(lgroup), by(value)
egen vhcount = sum(highgroup), by(value)
keep if vlcount>9 & vhcount >9
collapse prob_value guess altguess bayguess mse altmse baymse lgroup, by(value highgroup)
egen tt = sum(prob_value), by(highgroup)
gen prob_valuesy = prob_value/tt
gen foo = guess*(lgroup==1)
egen lguess = max(foo), by(value)
drop foo
gen foo = altguess*(lgroup==1)
egen altlguess = max(foo), by(value)
drop foo
gen foo = bayguess*(lgroup==1)
egen baylguess = max(foo), by(value)
drop foo
drop if highgroup==0
gen diff  = guess - lguess
gen altdiff  = altguess - altlguess
gen baydiff  = bayguess - baylguess
gen baz = diff *prob_valuesy
egen gd = sum(baz)
drop baz
gen baz = altdiff *prob_valuesy
egen altgd = sum(baz)
drop baz
gen baz = baydiff *prob_valuesy
egen baygd = sum(baz)
keep mse altmse gd altgd baygd baymse
local lmse = mse
local laltmse = altmse
local lgd = gd
local laltgd = altgd
local lbaygd = baygd
local lbaymse = baymse
restore
gen gd`i' = `lgd' 
gen altgd`i' = `laltgd' 
gen mse`i' = `lmse' 
gen altmse`i' = `laltmse' 
gen baymse`i' = `lbaymse' 
gen baygd`i' = `lbaygd'
 }

keep if _n ==1
keep gd* mse* altgd* altmse* baygd* baymse*
gen id =1


reshape long gd altgd altmse mse baygd baymse, i(id) j(index)  
preserve
gen dummy = altgd < gd
summarize dummy
drop dummy
gen dummy = altmse < mse
summarize dummy
drop dummy
gen dummy = baygd < gd
summarize dummy
drop dummy
gen dummy = baymse < mse
summarize dummy
restore

collapse gddata msedata altgd altmse baygd baymse
	

